1. Download and Clean refugee employment data
- Data downloaded as individual csv files from: https://www.sem.admin.ch/sem/de/home/publiservice/statistik/asylstatistik/archiv.html
- The code below was used to extract the total number of refugees, the
fraction of working age and the percentage employed for the top 10 most
frequent refugee nationalities with B permits
- Most frequent nationalities were: Eritrean, Ethiopian,Sri Lankan,
Turkish, Somali, Afghan,Iraqi, Iranian, Chinese, Syrian, Columbian and
Stateless (probably mostly Palestinian)
- Data does not include Ukrainians with S permits
# Extract Cantonal employment Data for the 10 most common refugee nationalities --------
#Create list of cantons in individual Excel worksheets
canton_list <- c("AG","AI","AR","BE","BL","BS","FR","GE","GL","GR","JU","LU",
"NE","NW","OW","SG","SH","SO","SZ","TG","TI","UR","VD","VS","ZH","ZG")
# Creates list of downloaded data files
file_list <- list.files(path = "Raw Data/", full.names = TRUE)
canton_employment_df <- data.frame()
for (file in file_list) {
#Extract date from filename
date_data <- str_extract(file, "\\d{4}-\\d{2}")
#Select top 10 nationalities for the individual file
sheet_nationality <- "CH-Nati"
nationality_data <- read_excel(file, sheet = sheet_nationality, range="A5:E129")
renamed_data_nationality <- nationality_data %>%
rename(Country=...1,
Total=...2,
Employment_Age=...3,
Employed=...4,
Employed_percent=...5)
#Remove all regional totals keeping only total number of refugees
remove <- c("Staat unbekannt","Ohne Nationalität","Afrika","Nordafrika",
"Subsahara","Amerika","Asien","Europa","Herkunft unbekannt")
nationality_cleaned <- renamed_data_nationality %>%
filter(!str_detect(Country, "Total")) %>%
filter(!Country %in% remove)
numbers_nationality <- nationality_cleaned %>%
select(Country, Total) %>%
arrange(desc(Total)) %>%
head(11)
country_list <- numbers_nationality$Country
for (canton in canton_list) {
canton_data <- read_excel(file, sheet = canton) %>%
filter(`6-23` %in% country_list)
#Keep only first 5 columns containing employment data
canton_summary <- canton_data[1:5] %>%
mutate(Canton = canton)
renamed_canton <- canton_summary %>%
rename(
Country = `6-23`,
Total = ...2,
Employment_Age = ...3,
Employed = ...4,
Employed_percent = ...5
)
#Add date to the extracted employment data
renamed_canton_date <- renamed_canton %>%
mutate(Date=date_data)
#Combine data from all worksheets and workbooks into single dataframe
canton_employment_df <- rbind(canton_employment_df, renamed_canton_date)
}
}
rio::export(canton_employment_df, "Nationality and Employment3.csv")
## Country Total Employment_Age Employed Employed_percent Canton Date
## 1 Gesamttotal 889 584 95 0.16267123287671234 AG 2013-01
## 2 Äthiopien 5 4 0 0 AG 2013-01
## 3 Eritrea 635 408 59 0.14460784313725492 AG 2013-01
## 4 Somalia 11 9 2 0.22222222222222221 AG 2013-01
## 5 Kolumbien 22 9 1 0.1111111111111111 AG 2013-01
2. Plot total number of refugees and employment rate over time
(2013-2023)
library(plotly)
library(rio)
library(zoo)
employment_data <- rio::import("..\\Nationality and Employment3.csv")
#Clean data and correct types
cleaned_data <- employment_data %>%
mutate(Date=(paste(Date,"-01",sep=""))) %>% #Add day
mutate(Date=as.POSIXct(Date, format = "%Y-%m-%d")) %>% #Convert to datetime
mutate(Employed_percent=as.numeric(Employed_percent)) #Convert percent to numeric not character
#Plot line chart showing employment by nationality in Switzerland over time by nationality
#Create dataframe containing plot data
employment_time_data <- cleaned_data %>%
group_by(Date, Country) %>%
summarise(Employment_average=mean(Employed_percent, na.rm=TRUE)) %>%
ungroup() %>%
mutate(rolling_mean_employment = zoo::rollmean(Employment_average, k = 3, fill = NA))
#Plot time series showing employment from 2013-2023
employment_plot <- employment_time_data %>%
group_by(Country) %>%
plot_ly(x=~Date, y=~rolling_mean_employment) %>%
add_lines(color = ~Country) %>%
layout(title="Employment by Most Common Refugee Nationalities",
yaxis=list(title="Average employment",tickformat = ".1%"))
#Plot line chart showing number of refugees in Switzerland granted B permits over time by nationality
#Create dataframe containing plot data
number_time_data <- cleaned_data %>%
filter(Country!="Gesamttotal") %>%
group_by(Date, Country) %>%
summarise(Employment_average=mean(Employed_percent, na.rm=TRUE),
Total=sum(Total)) %>%
ungroup()
#Plot time series showing employment from 2013-2023
number_plot <- number_time_data %>%
group_by(Country) %>%
plot_ly(x=~Date, y=~Total) %>%
add_lines(color = ~Country) %>%
layout(title="Number of Refugees from most common countries of origin (2013-2023)",
yaxis=list(title="Number of refugees"))
- Number of refugees increases dramatically from 2014 onwards with the
vast majority from Eritrea
- The Syrian civil war started in 2011, there is presumably a time mag
between the greatest numbers of refugees leaving Syria in 2012-2014 and
them being recognised as refugees and granted B permits
- The war in Sri Lanka officially ended in 2009, but the government
has continued to persecute
the minority Tamil and Muslim population under the Prevention of
Terrorism Act.
- Number
of Turkish refugees increased after attempted coup against Turkish
President Erdogan in 2019
- Employment percentage ranges between 12% and 55% for people aged
18-64 (Data has been smoothed using 3-month running average)
- Need to check precise definition of unemployment, does it include
people in education? Working part time? Training?
- Generally increases over time, presumably as refugees are granted
working rights and find jobs.
- China, Afghanistan, Eritrea and Somalia have the highest employed
percentage, while Turkey and Ethiopa have the lowest. Is this a
consequence of how long refugees have been in the country (Eritreans
have been arriving in large numbers since before 2013 while most Turkish
came after 2019) or is there another reason?
3. Plot total number of refugees and employment rate over time by
canton (2013-2023)
library(sf)
library(tidyverse)
library(plotly)
library(rio)
library(rjson)
library(crosstalk)
# 1. Import Employment and Geo Data ---------------------------------------
#Swiss map data downloaded from:https://cartographyvectors.com/map/1522-switzerland-with-regions
#Import Data
employment_data <- rio::import("..\\Nationality and Employment3.csv")
cantons_abbreviations <- rio::import("..\\Geographic Data/Cantons and Abbreviations.xlsx") #Matches cantons and abbreviations
boundaries_df <- sf::st_read("..\\Geographic Data/Switzerland boundaries.geojson")
## Reading layer `Switzerland boundaries' from data source
## `C:\Users\amand\OneDrive\Bureau\Refugee Employment Data Project\Refugee-Employment-Switzerland\Geographic Data\Switzerland boundaries.geojson'
## using driver `GeoJSON'
## Simple feature collection with 26 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 5.958 ymin: 45.819 xmax: 10.493 ymax: 47.81
## Geodetic CRS: WGS 84
boundaries_json <- rjson::fromJSON(file="..\\Geographic Data/Switzerland boundaries.geojson")
#Clean data and correct types
cleaned_data <- employment_data %>%
mutate(Date=(paste(Date,"-01",sep=""))) %>% #Add day
mutate(Date=as.POSIXct(Date, format = "%Y-%m-%d")) %>% #Convert to datetime
mutate(Employed_percent=as.numeric(Employed_percent))
#Join geographic and employment Data
all_geo_data <- left_join(boundaries_df,cantons_abbreviations,by=c("name"="Canton")) #Join cantons and abbreviations
geo_employment <- left_join(cleaned_data,all_geo_data,by=c("Canton"="Abbreviation")) #Merge geographic and Employment Data
# 2. Plot number of refugees per canton over time -------------------------
#Plot number of refugees per canton over time with employed percentage
employment_all_refugees_df <- geo_employment %>%
select(Date,Country,Employed_percent,name,Total) %>%
filter(Country=="Gesamttotal") %>%
mutate(Employed_percent=ifelse(Employed_percent=="-",NA,Employed_percent)) %>%
mutate(Employed_percent=round(Employed_percent*100,2)) %>%
group_by(year(Date),name) %>%
summarise(Employed_percent=round(mean(Employed_percent,na.rm=TRUE),1),
Total=round(mean(Total,na.rm=TRUE),0))
g <- list(
fitbounds = "locations",
visible = FALSE)
swiss_map <- plot_ly(hoverinfo = "text",
text = ~paste("Canton:", employment_all_refugees_df$name, "<br>",
"Employed:", employment_all_refugees_df$Employed_percent,"%", "<br>",
"Total:", employment_all_refugees_df$Total, "<br>")) %>%
add_trace(
type="choropleth",
geojson=boundaries_json,
locations=employment_all_refugees_df$name,
z=employment_all_refugees_df$Total,
colorscale="Blues",
frame=employment_all_refugees_df$`year(Date)`,
reversescale =T,
zmin=35,
zmax=9590,
featureidkey="properties.name") %>%
layout(geo = g) %>%
colorbar(thickess=20,
#orientation="h",
len=0.5) %>%
layout(title = "Number of Refugees with B Permits (2013-2023)")
# 2. Plot employment of refugees per canton over time -------------------------
g <- list(
fitbounds = "locations",
visible = FALSE)
swiss_map_employment <- plot_ly(hoverinfo = "text",
text = ~paste("Canton:", employment_all_refugees_df$name, "<br>",
"Employed:", employment_all_refugees_df$Employed_percent,"%", "<br>",
"Total:", employment_all_refugees_df$Total, "<br>")) %>%
add_trace(
type="choropleth",
geojson=boundaries_json,
locations=employment_all_refugees_df$name,
z=employment_all_refugees_df$Employed_percent,
colorscale="Blues",
frame=employment_all_refugees_df$`year(Date)`,
reversescale =T,
zmin=20,
zmax=70,
featureidkey="properties.name") %>%
layout(geo = g) %>%
colorbar(thickess=20,
#orientation="h",
len=0.5) %>%
layout(title = "Percentage of employed refugees with B permits (2013-2023)")
- Number of refugees with B permits has increased dramatically since
2013. In 2023 Bern Vaud and Zurich hosted the highest number of refugees
presumably because they have the largest populations.
- Are the cantons or the federal government responsible for processing
asylum applications? Is it possible some cantons are more likely to
recognise refugees than others? Could that partly explain cantonal
differences?
- In 2023 there were 3120 refugees with B permits compared to 529 in
2013. With so few refugees does it make sense to investigate medical
record data before 2013? There would presumably be much larger numbers
with N and F permits though, perhaps these numbers should be
checked.
- Would be interesting to look at nationality distribution of
individual cantons.
- Refugee employment has increased steadily over time with highest
employment in Glarus, Uri and Zurich in 2023. Employment is lowest in
French speaking region and Ticino.
- Graph is only meaningful after 2017 when there are a large number of
refugees and only for the larger cantons, Uri, Glarus etc. only have a
few hundred asylum seekers even in 2023.
- Would be interesting to explore the employment rate in different
cantons by nationality. Is it easier for refugees of certain
nationalities to find employment in some cantons than others?
4. Comparison with unemployment in Switzerland (2013-2023)
library(tidyverse)
library(rio)
# Data downloaded from:https://www.bfs.admin.ch/asset/en/ts-x-40.02.03.02.03
# Taken from Structural Survey
# Population taken from 15-64 years
# Import Data
swiss_employment <- rio::import("C:\\Users\\amand\\Downloads\\ts-x-40.02.03.02.03.csv")
canton_links <- rio::import("..\\Canton Links.xlsx")
boundaries_json <- rjson::fromJSON(file="..\\Geographic Data/Switzerland boundaries.geojson")
unemployment_data <- swiss_employment %>%
filter(ERWL==1 & UNIT_MEA=="pers in %") %>%
select(TIME_PERIOD,GEO,OBS_VALUE,OBS_STATUS)
unemployment_canton_df <- left_join(unemployment_data, canton_links, by=c("GEO"="CODE")) %>%
filter(LABEL_EN!="Switzerland")
#Plot data on choropleth map
g <- list(
fitbounds = "locations",
visible = FALSE)
swiss_map_3 <- plot_ly(hoverinfo = "text",
text = ~paste("Canton:", unemployment_canton_df$LABEL_EN, "<br>",
"Employed:", unemployment_canton_df$OBS_VALUE,"%", "<br>")) %>%
add_trace(
type="choropleth",
geojson=boundaries_json,
locations=unemployment_canton_df$LABEL_EN,
z=unemployment_canton_df$OBS_VALUE,
frame=~unemployment_canton_df$TIME_PERIOD,
colorscale="Blues",
reversescale =T,
zmin=0,
zmax=15,
featureidkey="properties.name") %>%
layout(geo = g) %>%
colorbar(thickess=20,
#orientation="h",
len=0.5) %>%
layout(title = "Unemployment in Switzerland")
swiss_map_3
- Ticino and French speaking region have the highest unemployment
rates